{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 聊天机器人"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "█\r"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Getting uuid of QR code.\n",
      "Downloading QR code.\n",
      "Please scan the QR code to log in.\n",
      "Please press confirm on your phone.\n",
      "Loading the contact, this may take a little while.\n",
      "Login successfully as 罗攀\n",
      "Start auto replying.\n",
      "LOG OUT!\n"
     ]
    }
   ],
   "source": [
    "import requests\n",
    "import itchat\n",
    "\n",
    "KEY = ''\n",
    "\n",
    "def get_response(msg):\n",
    "    # 这里我们就像在“3. 实现最简单的与图灵机器人的交互”中做的一样\n",
    "    # 构造了要发送给服务器的数据\n",
    "    apiUrl = 'http://www.tuling123.com/openapi/api'\n",
    "    data = {\n",
    "        'key'    : KEY,\n",
    "        'info'   : msg,\n",
    "        'userid' : 'wechat-robot',\n",
    "    }\n",
    "    try:\n",
    "        r = requests.post(apiUrl, data=data).json()\n",
    "        # 字典的get方法在字典没有'text'值的时候会返回None而不会抛出异常\n",
    "        return r.get('text')\n",
    "    # 为了防止服务器没有正常响应导致程序异常退出，这里用try-except捕获了异常\n",
    "    # 如果服务器没能正常交互（返回非json或无法连接），那么就会进入下面的return\n",
    "    except:\n",
    "        # 将会返回一个None\n",
    "        return\n",
    "\n",
    "# 这里是我们在“1. 实现微信消息的获取”中已经用到过的同样的注册方法\n",
    "@itchat.msg_register(itchat.content.TEXT)\n",
    "def tuling_reply(msg):\n",
    "    # 为了保证在图灵Key出现问题的时候仍旧可以回复，这里设置一个默认回复\n",
    "    defaultReply = 'I received: ' + msg['Text']\n",
    "    # 如果图灵Key出现问题，那么reply将会是None\n",
    "    reply = get_response(msg['Text'])\n",
    "    # a or b的意思是，如果a有内容，那么返回a，否则返回b\n",
    "    # 有内容一般就是指非空或者非None，你可以用`if a: print('True')`来测试\n",
    "    return reply or defaultReply\n",
    "\n",
    "# 为了让实验过程更加方便（修改程序不用多次扫码），我们使用热启动\n",
    "itchat.auto_login(hotReload=True)\n",
    "itchat.run()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 性别预测"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.feature_extraction.text import CountVectorizer\n",
    "from sklearn.naive_bayes import MultinomialNB"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>NickName</th>\n",
       "      <th>Sex</th>\n",
       "      <th>Province</th>\n",
       "      <th>City</th>\n",
       "      <th>Signature</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>☀️</td>\n",
       "      <td>2</td>\n",
       "      <td>湖北</td>\n",
       "      <td>武汉</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>郭成</td>\n",
       "      <td>1</td>\n",
       "      <td>湖南</td>\n",
       "      <td>常德</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>彭彬</td>\n",
       "      <td>1</td>\n",
       "      <td>云南</td>\n",
       "      <td>昆明</td>\n",
       "      <td>啦啦啦</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>科莫呗比</td>\n",
       "      <td>1</td>\n",
       "      <td>湖南</td>\n",
       "      <td>常德</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Q</td>\n",
       "      <td>2</td>\n",
       "      <td>湖南</td>\n",
       "      <td>常德</td>\n",
       "      <td>如果再见不能红着脸   是否还能红着眼</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  NickName  Sex Province City            Signature\n",
       "0       ☀️    2       湖北   武汉                  NaN\n",
       "1       郭成    1       湖南   常德                  NaN\n",
       "2       彭彬    1       云南   昆明                  啦啦啦\n",
       "3     科莫呗比    1       湖南   常德                  NaN\n",
       "4        Q    2       湖南   常德  如果再见不能红着脸   是否还能红着眼"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "all_data = pd.read_csv('friend.csv',encoding='utf-8')\n",
    "all_data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>NickName</th>\n",
       "      <th>Sex</th>\n",
       "      <th>Province</th>\n",
       "      <th>City</th>\n",
       "      <th>Signature</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>☀️</td>\n",
       "      <td>2</td>\n",
       "      <td>湖北</td>\n",
       "      <td>武汉</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>郭成</td>\n",
       "      <td>1</td>\n",
       "      <td>湖南</td>\n",
       "      <td>常德</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>彭彬</td>\n",
       "      <td>1</td>\n",
       "      <td>云南</td>\n",
       "      <td>昆明</td>\n",
       "      <td>啦啦啦</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>科莫呗比</td>\n",
       "      <td>1</td>\n",
       "      <td>湖南</td>\n",
       "      <td>常德</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Q</td>\n",
       "      <td>2</td>\n",
       "      <td>湖南</td>\n",
       "      <td>常德</td>\n",
       "      <td>如果再见不能红着脸   是否还能红着眼</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>一袭水袖舞倾城</td>\n",
       "      <td>2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>谁的青春不迷茫</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>夏夜星空</td>\n",
       "      <td>2</td>\n",
       "      <td>湖南</td>\n",
       "      <td>常德</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>此号停用</td>\n",
       "      <td>2</td>\n",
       "      <td>湖南</td>\n",
       "      <td>常德</td>\n",
       "      <td>做自己让以后的自己感谢的事！</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>夏</td>\n",
       "      <td>1</td>\n",
       "      <td>Abu Dhabi</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>盐析菜</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   NickName  Sex   Province City            Signature\n",
       "0        ☀️    2         湖北   武汉                  NaN\n",
       "1        郭成    1         湖南   常德                  NaN\n",
       "2        彭彬    1         云南   昆明                  啦啦啦\n",
       "3      科莫呗比    1         湖南   常德                  NaN\n",
       "4         Q    2         湖南   常德  如果再见不能红着脸   是否还能红着眼\n",
       "7   一袭水袖舞倾城    2        NaN  NaN              谁的青春不迷茫\n",
       "8      夏夜星空    2         湖南   常德                  NaN\n",
       "9      此号停用    2         湖南   常德       做自己让以后的自己感谢的事！\n",
       "10        夏    1  Abu Dhabi  NaN                  NaN\n",
       "11      盐析菜    1        NaN  NaN                  NaN"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data = all_data[(all_data['Sex'] == 1) | (all_data['Sex'] == 2)]\n",
    "data.head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(469, 5)"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "all_data.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(448, 5)"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "X_train, X_test, Y_train, Y_test = train_test_split(data['NickName'], data['Sex'], test_size=0.2, random_state=22)\n",
    "\n",
    "from sklearn.feature_extraction.text import CountVectorizer\n",
    "count_vect = CountVectorizer()\n",
    "X_train_cov = count_vect.fit_transform(X_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "MultinomialNB(alpha=0.0001, class_prior=None, fit_prior=True)"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "clf = MultinomialNB(alpha=0.0001)\n",
    "clf.fit(X_train_cov, Y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.96368715083798884"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "clf.score(X_train_cov, Y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.57777777777777772"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X_test_cov = count_vect.transform(X_test)\n",
    "clf.score(X_test_cov, Y_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([1, 1], dtype=int64)"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test = ['陈傻逼','罗罗攀']\n",
    "X = count_vect.transform(test)\n",
    "clf.predict(X)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
